dir()
[1] "Achilles_gene_effect (2).csv"
[2] "CCLE_expression (2).csv"
[3] "genes_PPARGcrisper_dup.txt"
[4] "MET500_B37_PPARG_RXRA.landview"
[5] "MET500_PPARG_RXRA.pdf"
[6] "PPARG in Avana associations.csv"
[7] "PPARG_dep_genes.txt"
[8] "PPARG_functional_genomics_summary.xlsx"
[9] "PPARG_Genomics_summary.pptx"
[10] "PPARG_RXRA_TCGA.txt"
[11] "PPARGcrisper_CNV_associations.csv"
[12] "PPARGcrisper_crisper_associations.csv"
[13] "PPARGcrisper_expr_associations.csv"
[14] "PPARGcrisper_mut_associations.csv"
[15] "predictive_biomarkers_PPARG.nb.html"
[16] "predictive_biomarkers_PPARG.Rmd"
[17] "RXRA in Avana associations.csv"
[18] "RXRA_CNV_genes.txt"
[19] "RXRA_dep_genes.txt"
[20] "RXRA_nonCNV_genes.txt"
[21] "RXRAcrisper_dep_genes.txt"
[22] "sample_info (1).csv"
[23] "Screen Shot 2021-01-20 at 12.01.53 PM.png"
[24] "Screen Shot 2021-01-20 at 12.02.00 PM.png"
[25] "Screen Shot 2021-01-20 at 12.02.08 PM.png"
[26] "Screen Shot 2021-01-20 at 12.02.30 PM.png"
[27] "Screen Shot 2021-01-20 at 12.02.37 PM.png"
[28] "TCGA_B37_PPARG_RXRA.landview"
[29] "TCGA_PPARG_RXRA.pdf"
pparg_associations <- read.csv("PPARG in Avana associations.csv")
pparg_associations[1:3,]
Gene.Compound Dataset Correlation
1 RXRA CRISPR (Avana) DepMap Consortium 20Q4 0.315
2 KLF5 CRISPR (Avana) DepMap Consortium 20Q4 0.245
3 FERMT1 CRISPR (Avana) DepMap Consortium 20Q4 0.234
unique(pparg_associations$Dataset)
[1] CRISPR (Avana) DepMap Consortium 20Q4 Expression DepMap Consortium 20Q4
[3] Copy Number DepMap Consortium 20Q4 Copy Number (Absolute)
[5] Mutation DepMap Consortium 20Q4
5 Levels: Copy Number (Absolute) ... Mutation DepMap Consortium 20Q4
ccle_expr <- read.csv("CCLE_expression (2).csv")
ccle_expr[1:2,1:5]
X TSPAN6..7105. TNMD..64102. DPM1..8813. SCYL3..57147.
1 ACH-001113 4.990501 0.0000000 7.273702 2.765535
2 ACH-001289 5.209843 0.5459684 7.070604 2.538538
achilles <- read.csv("Achilles_gene_effect (2).csv")
achilles[1:3,1:5]
X A1BG..1. A1CF..29974. A2M..2. A2ML1..144568.
1 ACH-000004 0.18074075 0.09016459 -0.19582862 -0.01454772
2 ACH-000005 -0.09021400 0.24210788 0.18906821 0.15878206
3 ACH-000007 0.06753821 0.07324729 -0.06541093 0.15577988
row.names(ccle_expr) <- ccle_expr$X
row.names(achilles) <- achilles$X
#install.packages("DescTools")
library(DescTools)
Attaching package: ‘DescTools’
The following object is masked from ‘package:data.table’:
%like%
expr_genes <- pparg_associations[pparg_associations$Dataset=="Expression DepMap Consortium 20Q4",]$Gene.Compound
crispr_genes <- pparg_associations[pparg_associations$Dataset=="CRISPR (Avana) DepMap Consortium 20Q4",]$Gene.Compound
crispr_genes
[1] RXRA KLF5 FERMT1 IQSEC1 EGFR CRKL DOCK5
[8] INS APOD ERRFI1 RAD51 NKIRAS1 LRRC2 GRHL2
[15] THRB FAM92B CAND2 CYP2W1 GRK2 YES1 TIMP4
[22] ZNF346 TUBB4B ELMO3 CHMP4B DENND3 SGMS2 BNIP3L
[29] ARHGEF7 ANKRD33 CIDEC CASKIN2 CPNE7 MKRN2 FOXQ1
[36] OSBPL11 BRK1 SOX13 BZW2 FARP1 UPF3A LRRC49
[43] DVL3 ABI1 AJUBA LIMD1 NACA KRAS UBE2R2
[50] EXOG MIR1915HG GGA3 SYT10 KAT2B TMEM40 CDC25C
[57] ARFRP1 GLI4 HEG1 CAP1 RAF1 EVA1C TRPM2
[64] SLC5A11 PTPRE ITGA3 USP4 TXNRD1 ZCWPW2 KCNT2
[71] BUB3 DYNLRB1 C1QL4 EFR3A TGM2 HOXA9 OCM
[78] ILK RAB10 AFF1 PDCD10 PLD6 MICOS10 NCEH1
[85] TMEM42 TXLNA SERPIND1 TPPP2 ZIC5 STT3A SRSF11
[92] SEC11A SLURP2 SEMA4B CAMK1 OR10G7 GATD1 FANCD2OS
[99] CHL1 GADL1
456 Levels: A1BG ABALON ABI1 ACAD9 ACOX3 ACSL5 ADGRF1 AFF1 AJUBA AKT1 ... ZZEF1
expr_genes
[1] PPARG GKN1 PRR15 PLEKHG6 MYZAP TINAGL1 VGLL1 ITGB6
[9] ARL14 SOWAHB B3GNT3 PSCA UPK2 CNGA1 CLDN4 MAP3K12
[17] ESRP1 SOX12 HSD3B1 SYT8 KLF5 VSIG2 C1orf116 RHOD
[25] AMPD2 TMEM139 DSG2 ATP8B2 GCOM1 UGT1A10 ST14 S100P
[33] PLA2G10 MPZL2 GJB3 SERPINB5 PRR3 KRT19 FOXQ1 ADGRF1
[41] PFAS C1orf210 DYRK3 MKRN2OS HS3ST1 CNTROB MSTO1 CRYBG2
[49] MAL2 CDH1 DVL2 ARRB2 PLEK2 CHMP4C SLC52A3 GJB4
[57] C6orf132 RNF223 MST1R SPRR3 KDF1 C11orf52 CLDN7 RAB25
[65] OVOL1 FXYD3 C1orf68 TNFRSF21 REN STARD9 PCDH1 IL20RA
[73] SLC9B2 EPCAM LAD1 ERP27 ACSL5 CLDN23 ELF3 ST3GAL3
[81] AP4B1 GRHL2 ALPP FAM189B PTGES TMEM45B NR1H3 DXO
[89] MALL UPK3B LIPH GPR87 TOP3A NET1 RABEP1 KRT7
[97] A1BG PERP APBA3 MARVELD2
456 Levels: A1BG ABALON ABI1 ACAD9 ACOX3 ACSL5 ADGRF1 AFF1 AJUBA AKT1 ... ZZEF1
expr_pattern <- paste0(expr_genes,"\\..%")
expr_pattern
[1] "PPARG\\..%" "GKN1\\..%" "PRR15\\..%" "PLEKHG6\\..%"
[5] "MYZAP\\..%" "TINAGL1\\..%" "VGLL1\\..%" "ITGB6\\..%"
[9] "ARL14\\..%" "SOWAHB\\..%" "B3GNT3\\..%" "PSCA\\..%"
[13] "UPK2\\..%" "CNGA1\\..%" "CLDN4\\..%" "MAP3K12\\..%"
[17] "ESRP1\\..%" "SOX12\\..%" "HSD3B1\\..%" "SYT8\\..%"
[21] "KLF5\\..%" "VSIG2\\..%" "C1orf116\\..%" "RHOD\\..%"
[25] "AMPD2\\..%" "TMEM139\\..%" "DSG2\\..%" "ATP8B2\\..%"
[29] "GCOM1\\..%" "UGT1A10\\..%" "ST14\\..%" "S100P\\..%"
[33] "PLA2G10\\..%" "MPZL2\\..%" "GJB3\\..%" "SERPINB5\\..%"
[37] "PRR3\\..%" "KRT19\\..%" "FOXQ1\\..%" "ADGRF1\\..%"
[41] "PFAS\\..%" "C1orf210\\..%" "DYRK3\\..%" "MKRN2OS\\..%"
[45] "HS3ST1\\..%" "CNTROB\\..%" "MSTO1\\..%" "CRYBG2\\..%"
[49] "MAL2\\..%" "CDH1\\..%" "DVL2\\..%" "ARRB2\\..%"
[53] "PLEK2\\..%" "CHMP4C\\..%" "SLC52A3\\..%" "GJB4\\..%"
[57] "C6orf132\\..%" "RNF223\\..%" "MST1R\\..%" "SPRR3\\..%"
[61] "KDF1\\..%" "C11orf52\\..%" "CLDN7\\..%" "RAB25\\..%"
[65] "OVOL1\\..%" "FXYD3\\..%" "C1orf68\\..%" "TNFRSF21\\..%"
[69] "REN\\..%" "STARD9\\..%" "PCDH1\\..%" "IL20RA\\..%"
[73] "SLC9B2\\..%" "EPCAM\\..%" "LAD1\\..%" "ERP27\\..%"
[77] "ACSL5\\..%" "CLDN23\\..%" "ELF3\\..%" "ST3GAL3\\..%"
[81] "AP4B1\\..%" "GRHL2\\..%" "ALPP\\..%" "FAM189B\\..%"
[85] "PTGES\\..%" "TMEM45B\\..%" "NR1H3\\..%" "DXO\\..%"
[89] "MALL\\..%" "UPK3B\\..%" "LIPH\\..%" "GPR87\\..%"
[93] "TOP3A\\..%" "NET1\\..%" "RABEP1\\..%" "KRT7\\..%"
[97] "A1BG\\..%" "PERP\\..%" "APBA3\\..%" "MARVELD2\\..%"
colnames(ccle_expr)[colnames(ccle_expr) %like any% c("CDH1\\..%")]
[1] "CDH1..999."
expr_genes_matched<- colnames(ccle_expr)[colnames(ccle_expr) %like any% expr_pattern]
expr_genes_matched
[1] "HS3ST1..9957." "DVL2..1856." "PLEKHG6..55200."
[4] "APBA3..9546." "IL20RA..53832." "VSIG2..23584."
[7] "NR1H3..10062." "RABEP1..9135." "CDH1..999."
[10] "DSG2..1829." "PLA2G10..8399." "GRHL2..79977."
[13] "FXYD3..5349." "PLEK2..26499." "SLC52A3..113278."
[16] "VGLL1..51442." "KLF5..688." "ESRP1..54845."
[19] "UPK2..7379." "PERP..64065." "ITGB6..3694."
[22] "AMPD2..271." "EPCAM..4072." "A1BG..1."
[25] "MSTO1..55154." "ST3GAL3..6487." "PPARG..5468."
[28] "RAB25..57111." "AP4B1..10717." "KRT7..3855."
[31] "GCOM1..145781." "GPR87..53836." "ERP27..121506."
[34] "MAP3K12..7786." "ARRB2..409." "TINAGL1..64129."
[37] "DYRK3..8444." "ATP8B2..57198." "REN..5972."
[40] "MALL..7851." "TNFRSF21..27242." "MAL2..114569."
[43] "PTGES..9536." "SYT8..90019." "C11orf52..91894."
[46] "ST14..6768." "MPZL2..10205." "TMEM45B..120224."
[49] "MARVELD2..153562." "ADGRF1..266977." "PCDH1..5097."
[52] "LAD1..3898." "STARD9..57519." "FAM189B..10712."
[55] "SPRR3..6707." "ALPP..250." "ELF3..1999."
[58] "LIPH..200879." "S100P..6286." "SLC9B2..133308."
[61] "MST1R..4486." "FOXQ1..94234." "CHMP4C..92421."
[64] "PSCA..8000." "GKN1..56287." "CNTROB..116840."
[67] "KRT19..3880." "OVOL1..5017." "RHOD..29984."
[70] "NET1..10276." "KDF1..126695." "CRYBG2..55057."
[73] "PRR15..222171." "TOP3A..7156." "SOX12..6666."
[76] "TMEM139..135932." "PFAS..5198." "ARL14..80117."
[79] "B3GNT3..10331." "CLDN7..1366." "C1orf116..79098."
[82] "SOWAHB..345079." "C6orf132..647024." "GJB3..2707."
[85] "CLDN4..1364." "GJB4..127534." "ACSL5..51703."
[88] "CNGA1..1259." "C1orf68..100129271." "HSD3B1..3283."
[91] "DXO..1797." "PRR3..80742." "SERPINB5..5268."
[94] "MKRN2OS..100129480." "RNF223..401934." "UGT1A10..54575."
[97] "UPK3B..105375355." "C1orf210..149466." "CLDN23..137075."
[100] "MYZAP..100820829."
colnames(achilles)[colnames(achilles) %like any% c("PPARG\\..%")]
[1] "PPARG..5468."
length(achilles$PPARG..5468.)
[1] 811
shared_cell_lines <- intersect(achilles[!is.na(achilles$PPARG..5468.),]$X,ccle_expr$X)
length(shared_cell_lines)
[1] 790
#install.packages("glmnet")
require(RCurl);
Loading required package: RCurl
there is no package called ‘RCurl’
require(caret);
Loading required package: caret
there is no package called ‘caret’
library(data.table) # provides enhanced data.frame
library(ggplot2) # plotting
library(glmnet) # ridge, elastic net, and lasso
Loading required package: Matrix
Attaching package: ‘Matrix’
The following objects are masked from ‘package:tidyr’:
expand, pack, unpack
Loaded glmnet 4.1
# glmnet requires x matrix (of predictors) and vector (values for y)
y = achilles[shared_cell_lines,]$PPARG..5468. # vector y values
#x = model.matrix(y~.,ccle_expr[shared_cell_lines,expr_genes_matched]) # matrix of predictors
x=as.matrix(ccle_expr[shared_cell_lines,expr_genes_matched])
set.seed(123) # replicate results
en_model <- cv.glmnet(x, y, alpha=0.5) # 0 < alpha < 1 elastic net
best_lambda_en <- en_model$lambda.1se # largest lambda in 1 SE
en_coef <- en_model$glmnet.fit$beta[, # retrieve coefficients
en_model$glmnet.fit$lambda # at lambda.1se
== best_lambda_en]
coef_en = data.table(elasticNet = en_coef) # build table
coef_en[, feature := names(en_coef)] # add feature names
to_plot_r = melt(coef_en # label table
, id.vars='feature'
, variable.name = 'model'
, value.name = 'coefficient')
ggplot(data=to_plot_r, # plot coefficients
aes(x=feature, y=coefficient, fill=model)) +
coord_flip() +
geom_bar(stat='identity', fill='brown4', color='blue') +
facet_wrap(~ model) + guides(fill=FALSE)
# glmnet requires x matrix (of predictors) and vector (values for y)
y = achilles[shared_cell_lines,]$PPARG..5468. # vector y values
#x = model.matrix(y~.,ccle_expr[shared_cell_lines,expr_genes_matched]) # matrix of predictors
x=as.matrix(ccle_expr[shared_cell_lines,expr_genes_matched])
scaled.x=scale(x)
set.seed(123) # replicate results
en_model <- cv.glmnet(scaled.x, y, alpha=0.5) # 0 < alpha < 1 elastic net
best_lambda_en <- en_model$lambda.1se # largest lambda in 1 SE
en_coef <- en_model$glmnet.fit$beta[, # retrieve coefficients
en_model$glmnet.fit$lambda # at lambda.1se
== best_lambda_en]
coef_en = data.table(elasticNet = en_coef) # build table
coef_en[, feature := names(en_coef)] # add feature names
to_plot_r = melt(coef_en # label table
, id.vars='feature'
, variable.name = 'model'
, value.name = 'coefficient')
ggplot(data=to_plot_r, # plot coefficients
aes(x=feature, y=coefficient, fill=model)) +
coord_flip() +
geom_bar(stat='identity', fill='brown4', color='blue') +
facet_wrap(~ model) + guides(fill=FALSE)
#install.packages("iml")
library(iml)
Using the interpretable machine learning library (iml) to illustrate the fetures; #iml needs data frame yet glmnet need matrix format input #there needs to be a work-around
#data1 <- as.data.frame(x)
#colnames(data1) <- expr_genes_matched
iml_predictor <- Predictor$new(en_model, data = x, y = y) #pass the x,y values from above
Error in .subset2(public_bind_env, "initialize")(...) :
Assertion on 'X' failed: Must be of type 'data.frame', not 'matrix'.
https://github.com/christophM/iml/issues/29
##adapted from the github repo above
predict.function=function(object, newdata){
newData_x = data.matrix(newdata)
results<-predict(en_model, newData_x)
return(results)
}
data1 <- as.data.frame(x)
colnames(data1) <- expr_genes_matched
iml_predictor <- Predictor$new(en_model, data = data1, y = y,
predict.fun = predict.function)
imp_features <- FeatureImp$new(iml_predictor, loss = "mse")
plot(imp_features)
#shapley <- Shapley$new(predictor, x.interest = x[1,], sample.size = 10, run = TRUE)
imp_features
Interpretation method: FeatureImp
error function: mse
Analysed predictor:
Prediction task: unknown
Analysed data:
Sampling from data.frame with 790 rows and 100 columns.
Head of results:
feature importance.05 importance importance.95 permutation.error
1 PPARG..5468. 1.147243 1.169236 1.193406 0.03284529
2 GKN1..56287. 1.025909 1.027347 1.033216 0.02885945
3 PRR15..222171. 1.015853 1.021164 1.025466 0.02868577
4 HSD3B1..3283. 1.012034 1.013427 1.015288 0.02846844
5 FAM189B..10712. 1.006032 1.009651 1.011171 0.02836236
6 AMPD2..271. 1.006576 1.009572 1.010556 0.02836014
plot(imp_features)
#install.packages("gower")
library(gower)
lime.explain <- LocalModel$new(iml_predictor, k=10,x.interest = data1[1, ])
which(y< (-0.5))
[1] 26 209 302 329 361 400 453 491 525 529 635 637 639 647 654 664 686 756
lime.explain26 <- LocalModel$new(iml_predictor, k=10,x.interest = data1[26, ])
plot(lime.explain26)
lime.explain209 <- LocalModel$new(iml_predictor, k=10,x.interest = data1[209, ])
plot(lime.explain209)
cell_sampleinfo <- read.csv("sample_info (1).csv")
cell_sampleinfo[1:3,]
DepMap_ID cell_line_name stripped_cell_line_name
1 ACH-000001 NIH:OVCAR-3 NIHOVCAR3
2 ACH-000002 HL-60 HL60
3 ACH-000003 CACO2 CACO2
CCLE_Name alias COSMICID sex source
1 NIHOVCAR3_OVARY OVCAR3 905933 Female ATCC
2 HL60_HAEMATOPOIETIC_AND_LYMPHOID_TISSUE 905938 Female ATCC
3 CACO2_LARGE_INTESTINE CACO2, CaCo-2 NA Male ATCC
Achilles_n_replicates cell_line_NNMD culture_type culture_medium cas9_activity
1 NA NA NA
2 NA NA NA
3 NA NA NA
RRID WTSI_Master_Cell_ID sample_collection_site
1 CVCL_0465 2201 ascites
2 CVCL_0002 55 haematopoietic_and_lymphoid_tissue
3 CVCL_0025 NA Colon
primary_or_metastasis primary_disease
1 Metastasis Ovarian Cancer
2 Primary Leukemia
3 Colon/Colorectal Cancer
Subtype age Sanger_Model_ID
1 Adenocarcinoma, high grade serous 60 SIDM00105
2 Acute Myelogenous Leukemia (AML), M3 (Promyelocytic) 35 SIDM00829
3 Adenocarcinoma NA SIDM00891
depmap_public_comments lineage lineage_subtype lineage_sub_subtype
1 ovary ovary_adenocarcinoma high_grade_serous
2 blood AML M3
3 colorectal colorectal_adenocarcinoma
lineage_molecular_subtype
1
2
3
row.names(cell_sampleinfo) <- cell_sampleinfo$DepMap_ID
nrow(ccle_expr[shared_cell_lines,expr_genes_matched])
[1] 790
colnames(cell_sampleinfo)
[1] "DepMap_ID" "cell_line_name"
[3] "stripped_cell_line_name" "CCLE_Name"
[5] "alias" "COSMICID"
[7] "sex" "source"
[9] "Achilles_n_replicates" "cell_line_NNMD"
[11] "culture_type" "culture_medium"
[13] "cas9_activity" "RRID"
[15] "WTSI_Master_Cell_ID" "sample_collection_site"
[17] "primary_or_metastasis" "primary_disease"
[19] "Subtype" "age"
[21] "Sanger_Model_ID" "depmap_public_comments"
[23] "lineage" "lineage_subtype"
[25] "lineage_sub_subtype" "lineage_molecular_subtype"
k=ccle_expr[shared_cell_lines,expr_genes_matched]
k$PPARG_crispr <- achilles[shared_cell_lines,]$PPARG..5468.
k$ID <- row.names(k)
k<- merge(k, cell_sampleinfo,by.x="ID",by.y="DepMap_ID")
colnames(cell_sampleinfo)
[1] "DepMap_ID" "cell_line_name"
[3] "stripped_cell_line_name" "CCLE_Name"
[5] "alias" "COSMICID"
[7] "sex" "source"
[9] "Achilles_n_replicates" "cell_line_NNMD"
[11] "culture_type" "culture_medium"
[13] "cas9_activity" "RRID"
[15] "WTSI_Master_Cell_ID" "sample_collection_site"
[17] "primary_or_metastasis" "primary_disease"
[19] "Subtype" "age"
[21] "Sanger_Model_ID" "depmap_public_comments"
[23] "lineage" "lineage_subtype"
[25] "lineage_sub_subtype" "lineage_molecular_subtype"
cell_sampleinfo[shared_cell_lines,][1:2,]
DepMap_ID cell_line_name stripped_cell_line_name
ACH-000004 ACH-000004 HEL HEL
ACH-000005 ACH-000005 HEL 92.1.7 HEL9217
CCLE_Name alias COSMICID sex source
ACH-000004 HEL_HAEMATOPOIETIC_AND_LYMPHOID_TISSUE 907053 Male DSMZ
ACH-000005 HEL9217_HAEMATOPOIETIC_AND_LYMPHOID_TISSUE NA Male ATCC
Achilles_n_replicates cell_line_NNMD culture_type culture_medium
ACH-000004 2 -3.079202 Suspension RPMI + 10% FBS
ACH-000005 2 -2.404409 Suspension RPMI + 10% FBS
cas9_activity RRID WTSI_Master_Cell_ID
ACH-000004 52.4 CVCL_0001 783
ACH-000005 86.6 CVCL_2481 NA
sample_collection_site primary_or_metastasis primary_disease
ACH-000004 haematopoietic_and_lymphoid_tissue Leukemia
ACH-000005 bone_marrow Leukemia
Subtype age
ACH-000004 Acute Myelogenous Leukemia (AML), M6 (Erythroleukemia) 30
ACH-000005 Acute Myelogenous Leukemia (AML), M6 (Erythroleukemia) 30
Sanger_Model_ID depmap_public_comments lineage lineage_subtype
ACH-000004 SIDM00594 blood AML
ACH-000005 SIDM00593 blood AML
lineage_sub_subtype lineage_molecular_subtype
ACH-000004 M6
ACH-000005 M6
cell_sampleinfo_in_the_same_order<- cell_sampleinfo[shared_cell_lines,]
index_less_than_minus_point_five <- which (y< (-0.5))
sample_records_less_than_minus_point_five <- cell_sampleinfo_in_the_same_order[which (y< (-0.5)),]
sample_records_less_than_minus_point_five$Index_number <- index_less_than_minus_point_five
colnames(sample_records_less_than_minus_point_five)
[1] "DepMap_ID" "cell_line_name"
[3] "stripped_cell_line_name" "CCLE_Name"
[5] "alias" "COSMICID"
[7] "sex" "source"
[9] "Achilles_n_replicates" "cell_line_NNMD"
[11] "culture_type" "culture_medium"
[13] "cas9_activity" "RRID"
[15] "WTSI_Master_Cell_ID" "sample_collection_site"
[17] "primary_or_metastasis" "primary_disease"
[19] "Subtype" "age"
[21] "Sanger_Model_ID" "depmap_public_comments"
[23] "lineage" "lineage_subtype"
[25] "lineage_sub_subtype" "lineage_molecular_subtype"
[27] "Index_number"
unique(sample_records_less_than_minus_point_five$primary_disease)
[1] Pancreatic Cancer Lung Cancer Bladder Cancer
[4] Esophageal Cancer Colon/Colorectal Cancer Endometrial/Uterine Cancer
[7] Bile Duct Cancer
35 Levels: Adrenal Cancer Bile Duct Cancer Bladder Cancer ... Unknown
sample_records_less_than_minus_point_five
DepMap_ID cell_line_name stripped_cell_line_name CCLE_Name
ACH-000042 ACH-000042 Panc 02.03 PANC0203 PANC0203_PANCREAS
ACH-000395 ACH-000395 NCI-H520 NCIH520 NCIH520_LUNG
ACH-000547 ACH-000547 HT-1197 HT1197 HT1197_URINARY_TRACT
ACH-000599 ACH-000599 PA-TU-8902 PATU8902 PATU8902_PANCREAS
ACH-000652 ACH-000652 SUIT-2 SUIT2 SUIT2_PANCREAS
ACH-000724 ACH-000724 HT-1376 HT1376 HT1376_URINARY_TRACT
ACH-000809 ACH-000809 KYSE-410 KYSE410 KYSE410_OESOPHAGUS
ACH-000862 ACH-000862 KMBC-2 KMBC2 KMBC2_URINARY_TRACT
ACH-000916 ACH-000916 NCI-H1573 NCIH1573 NCIH1573_LUNG
ACH-000926 ACH-000926 HT55 HT55 HT55_LARGE_INTESTINE
ACH-001375 ACH-001375 PACADD-119 PACADD119 PACADD119_PANCREAS
ACH-001379 ACH-001379 PACADD-161 PACADD161 PACADD161_PANCREAS
ACH-001382 ACH-001382 PACADD-188 PACADD188 PACADD188_PANCREAS
ACH-001408 ACH-001408 UM-UC-14 UMUC14 UMUC14_URINARY_TRACT
ACH-001416 ACH-001416 UM-UC9 UMUC9 UMUC9_URINARY_TRACT
ACH-001458 ACH-001458 C75 C75 C75_LARGE_INTESTINE
ACH-001530 ACH-001530 JEG-3 JEG3 JEG3_PLACENTA
ACH-001842 ACH-001842 ICC2 ICC2 ICC2_BILIARY_TRACT
alias COSMICID sex source Achilles_n_replicates
ACH-000042 1298475 Female ATCC 4
ACH-000395 908443 Male ATCC 3
ACH-000547 907065 Male ATCC 2
ACH-000599 1298526 Female DSMZ 1
ACH-000652 1240219 Male HSRRB 2
ACH-000724 907066 Female ATCC 2
ACH-000809 753574 Male DSMZ 2
ACH-000862 NA Unknown HSRRB 2
ACH-000916 908472 Female ATCC 2
ACH-000926 907287 Unknown ECACC 3
ACH-001375 NA Male DSMZ 2
ACH-001379 NA Female DSMZ 2
ACH-001382 NA Female DSMZ 2
ACH-001408 NA Male Sigma-Aldrich 2
ACH-001416 NA Male Sigma-Aldrich 2
ACH-001458 NA Male ECACC 2
ACH-001530 907176 Male ATCC 1
ACH-001842 NA Unknown Academic lab 2
cell_line_NNMD culture_type
ACH-000042 -3.342297 Adherent
ACH-000395 -2.777346 Adherent
ACH-000547 -2.868265 Adherent
ACH-000599 -3.174826 Adherent
ACH-000652 -3.313006 Adherent
ACH-000724 -2.699458 Adherent
ACH-000809 -4.254809 Adherent
ACH-000862 -3.879925
ACH-000916 -1.349888 Adherent
ACH-000926 -2.824135 Adherent
ACH-001375 -4.033929 Adherent
ACH-001379 -3.950776 Adherent
ACH-001382 -1.581117 Adherent
ACH-001408 -4.462550 Adherent
ACH-001416 -2.919826 Adherent
ACH-001458 -2.296308 Adherent
ACH-001530 -3.590270 Adherent
ACH-001842 -1.876980 Adherent
culture_medium
ACH-000042 RPMI + 10% FBS + 1mM Sodium pyruvate
ACH-000395 RPMI + 10% FBS
ACH-000547 EMEM + 10% FBS
ACH-000599 DMEM + 10% FBS
ACH-000652 RPMI + 10% FBS
ACH-000724 EMEM + 10% FBS
ACH-000809 RPMI + 10% FBS
ACH-000862 DMEM + 10% FBS
ACH-000916 RPMI + 5% FBS
ACH-000926 EMEM + 20% FBS + Glutamine + NEAA
ACH-001375 DMEM:Keratinocyte SFM (1:1)+20%FBS
ACH-001379 DMEM:Keratinocyte SFM (1:1)+20%FBS
ACH-001382 DMEM:Keratinocyte SFM (1:1)+20%FBS
ACH-001408 EMEM (EBSS) + 10% FBS + 2 mM Glutamine + 1% Non Essential Amino Acids (NEAA)
ACH-001416 EMEM (EBSS) + 10% FBS + 2 mM Glutamine + 1% Non Essential Amino Acids (NEAA)
ACH-001458 IMDM + 10% FBS + 2 mM Glutamine
ACH-001530 EMEM + 10% FBS
ACH-001842 RPMI + 10% FBS
cas9_activity RRID WTSI_Master_Cell_ID sample_collection_site
ACH-000042 88.9 CVCL_1633 1838 pancreas
ACH-000395 86.6 CVCL_1566 2200 lung
ACH-000547 72.0 CVCL_1291 1533 urinary_tract
ACH-000599 55.0 CVCL_1845 1549 pancreas
ACH-000652 75.4 CVCL_3172 1749 liver
ACH-000724 51.4 CVCL_1292 1211 urinary_tract
ACH-000809 91.3 CVCL_1352 952 oesophagus
ACH-000862 70.6 CVCL_2977 NA urinary_tract
ACH-000916 52.0 CVCL_1478 372 soft_tissue
ACH-000926 69.4 CVCL_1294 1688 large_intestine
ACH-001375 72.7 CVCL_1848 NA pancreas
ACH-001379 65.0 CVCL_M466 NA liver
ACH-001382 45.3 CVCL_M469 NA pancreas
ACH-001408 70.5 CVCL_2747 NA urinary_tract
ACH-001416 76.2 CVCL_2753 NA urinary_tract
ACH-001458 42.9 CVCL_5248 NA large_intestine
ACH-001530 95.1 CVCL_0363 1195 central_nervous_system
ACH-001842 69.1 CVCL_VV27 NA biliary_tract
primary_or_metastasis primary_disease
ACH-000042 Primary Pancreatic Cancer
ACH-000395 Primary Lung Cancer
ACH-000547 Primary Bladder Cancer
ACH-000599 Primary Pancreatic Cancer
ACH-000652 Metastasis Pancreatic Cancer
ACH-000724 Primary Bladder Cancer
ACH-000809 Primary Esophageal Cancer
ACH-000862 Primary Bladder Cancer
ACH-000916 Metastasis Lung Cancer
ACH-000926 Primary Colon/Colorectal Cancer
ACH-001375 Primary Pancreatic Cancer
ACH-001379 Metastasis Pancreatic Cancer
ACH-001382 Metastasis Pancreatic Cancer
ACH-001408 Metastasis Bladder Cancer
ACH-001416 Primary Bladder Cancer
ACH-001458 Primary Colon/Colorectal Cancer
ACH-001530 Metastasis Endometrial/Uterine Cancer
ACH-001842 Bile Duct Cancer
Subtype age
ACH-000042 Ductal Adenocarcinoma, exocrine 70
ACH-000395 Non-Small Cell Lung Cancer (NSCLC), Squamous Cell Carcinoma NA
ACH-000547 Carcinoma 44
ACH-000599 Ductal Adenocarcinoma, exocrine 44
ACH-000652 Ductal Adenocarcinoma, exocrine 73
ACH-000724 Carcinoma 58
ACH-000809 Squamous Cell Carcinoma 51
ACH-000862 Carcinoma NA
ACH-000916 Non-Small Cell Lung Cancer (NSCLC), Adenocarcinoma 35
ACH-000926 Adenocarcinoma NA
ACH-001375 Ductal Adenocarcinoma, exocrine 59
ACH-001379 Ductal Adenocarcinoma, exocrine 63
ACH-001382 Ductal Adenocarcinoma, exocrine 68
ACH-001408 Transitional Cell Carcinoma NA
ACH-001416 Transitional Cell Carcinoma NA
ACH-001458 Adenocarcinoma 56
ACH-001530 Choriocarcinoma NA
ACH-001842 Cholangiocarcinoma, intrahepatic NA
Sanger_Model_ID depmap_public_comments lineage
ACH-000042 SIDM01139 pancreas
ACH-000395 SIDM01130 lung
ACH-000547 SIDM00676 urinary_tract
ACH-000599 SIDM00455 pancreas
ACH-000652 SIDM00371 pancreas
ACH-000724 SIDM00678 urinary_tract
ACH-000809 SIDM01028 esophagus
ACH-000862 urinary_tract
ACH-000916 SIDM00749 lung
ACH-000926 SIDM00541 colorectal
ACH-001375 pancreas
ACH-001379 pancreas
ACH-001382 pancreas
ACH-001408 urinary_tract
ACH-001416 urinary_tract
ACH-001458 colorectal
ACH-001530 SIDM01218 uterus
ACH-001842 bile_duct
lineage_subtype lineage_sub_subtype
ACH-000042 exocrine exocrine_adenocarcinoma
ACH-000395 NSCLC NSCLC_squamous
ACH-000547 bladder_carcinoma
ACH-000599 exocrine exocrine_adenocarcinoma
ACH-000652 exocrine exocrine_adenocarcinoma
ACH-000724 bladder_carcinoma
ACH-000809 esophagus_squamous
ACH-000862 bladder_carcinoma
ACH-000916 NSCLC NSCLC_adenocarcinoma
ACH-000926 colorectal_adenocarcinoma
ACH-001375 exocrine exocrine_adenocarcinoma
ACH-001379 exocrine exocrine_adenocarcinoma
ACH-001382 exocrine exocrine_adenocarcinoma
ACH-001408 bladder_carcinoma bladder_transitional_cell
ACH-001416 bladder_carcinoma bladder_transitional_cell
ACH-001458 colorectal_adenocarcinoma
ACH-001530 choriocarcinoma
ACH-001842 cholangiocarcinoma intrahepatic
lineage_molecular_subtype Index_number
ACH-000042 26
ACH-000395 209
ACH-000547 302
ACH-000599 329
ACH-000652 361
ACH-000724 400
ACH-000809 453
ACH-000862 491
ACH-000916 525
ACH-000926 529
ACH-001375 635
ACH-001379 637
ACH-001382 639
ACH-001408 647
ACH-001416 654
ACH-001458 664
ACH-001530 686
ACH-001842 756
sample_records_less_than_minus_point_five[sample_records_less_than_minus_point_five$primary_disease %in% c("Pancreatic Cancer"),]$Index_number
[1] 26 329 361 635 637 639
sample_records_less_than_minus_point_five[sample_records_less_than_minus_point_five$primary_disease %in% c("Bladder Cancer"),]$Index_number
[1] 302 400 491 647 654
sample_records_less_than_minus_point_five[sample_records_less_than_minus_point_five$primary_disease %in% c("Colon/Colorectal Cancer"),]$Index_number
[1] 529 664
lime.explain302 <- LocalModel$new(iml_predictor, k=10,x.interest = data1[302, ])
plot(lime.explain302)
lime.explain400 <- LocalModel$new(iml_predictor, k=10,x.interest = data1[400, ])
plot(lime.explain400)
lime.explain491 <- LocalModel$new(iml_predictor, k=10,x.interest = data1[491, ])
plot(lime.explain491)
lime.explain647 <- LocalModel$new(iml_predictor, k=10,x.interest = data1[647, ])
Had to choose a smaller k
plot(lime.explain647)
lime.explain654 <- LocalModel$new(iml_predictor, k=10,x.interest = data1[654, ])
plot(lime.explain654)
lime.explain26 <- LocalModel$new(iml_predictor, k=10,x.interest = data1[26, ])
plot(lime.explain26)
lime.explain329 <- LocalModel$new(iml_predictor, k=10,x.interest = data1[329, ])
plot(lime.explain329)
lime.explain361 <- LocalModel$new(iml_predictor, k=10,x.interest = data1[361, ])
plot(lime.explain361)
lime.explain635 <- LocalModel$new(iml_predictor, k=10,x.interest = data1[635, ])
plot(lime.explain635)
lime.explain637 <- LocalModel$new(iml_predictor, k=10,x.interest = data1[637, ])
Had to choose a smaller k
plot(lime.explain637)
lime.explain639 <- LocalModel$new(iml_predictor, k=10,x.interest = data1[639, ])
Had to choose a smaller k
plot(lime.explain639)
which(y>0.5)
[1] 318 709
shapley302 <- Shapley$new(iml_predictor, x.interest = data1[302, ])
plot(shapley302)
##explain a pancreatic sample
shapley26 <- Shapley$new(iml_predictor, x.interest = data1[26, ])
plot(shapley26)
ggplot(k) +geom_point(aes(x=PPARG..5468.,y=PPARG_crispr,size=PPARG..5468.,shape=primary_disease))
ggplot(k) +geom_point(aes(x=PPARG..5468.,y=PPARG_crispr,size=GKN1..56287.,shape=primary_disease))
ggplot(k) +geom_point(aes(x=PPARG..5468.,y=PPARG_crispr,size=k$PRR15..222171.,shape=primary_disease))
ggplot(k) +geom_point(aes(x=PPARG..5468.,y=PPARG_crispr,size=AMPD2..271.,shape=primary_disease))
ggplot(k) +geom_point(aes(x=PPARG..5468.,y=PPARG_crispr,size=FOXQ1..94234.,shape=primary_disease))
ggplot(k) +geom_boxplot(aes(x=primary_disease,y=PPARG_crispr)) + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
en_model
Call: cv.glmnet(x = scaled.x, y = y, alpha = 0.5)
Measure: Mean-Squared Error
Lambda Index Measure SE Nonzero
min 0.01273 26 0.02797 0.002306 22
1se 0.04681 12 0.03002 0.002671 18
plot(en_model)
crispr_pattern <- paste0(crispr_genes,"\\..%")
crispr_genes_matched<- colnames(achilles)[colnames(achilles) %like any% crispr_pattern]
crispr_genes_matched
[1] "ABI1..10006." "AFF1..4299." "AJUBA..84962." "ANKRD33..341405."
[5] "APOD..347." "ARFRP1..10139." "ARHGEF7..8874." "BNIP3L..665."
[9] "BRK1..55845." "BUB3..9184." "BZW2..28969." "C1QL4..338761."
[13] "CAMK1..8536." "CAND2..23066." "CAP1..10487." "CASKIN2..57513."
[17] "CDC25C..995." "CHL1..10752." "CHMP4B..128866." "CIDEC..63924."
[21] "CPNE7..27132." "CRKL..1399." "CYP2W1..54905." "DENND3..22898."
[25] "DOCK5..80005." "DVL3..1857." "DYNLRB1..83658." "EFR3A..23167."
[29] "EGFR..1956." "ELMO3..79767." "ERRFI1..54206." "EVA1C..59271."
[33] "EXOG..9941." "FAM92B..339145." "FANCD2OS..115795." "FARP1..10160."
[37] "FERMT1..55612." "FOXQ1..94234." "GADL1..339896." "GATD1..347862."
[41] "GGA3..23163." "GLI4..2738." "GRHL2..79977." "GRK2..156."
[45] "HEG1..57493." "HOXA9..3205." "ILK..3611." "INS..3630."
[49] "IQSEC1..9922." "ITGA3..3675." "KAT2B..8850." "KCNT2..343450."
[53] "KLF5..688." "KRAS..3845." "LIMD1..8994." "LRRC2..79442."
[57] "LRRC49..54839." "MKRN2..23609." "NACA..4666." "NCEH1..57552."
[61] "NKIRAS1..28512." "OCM..654231." "OR10G7..390265." "OSBPL11..114885."
[65] "PDCD10..11235." "PLD6..201164." "PTPRE..5791." "RAB10..10890."
[69] "RAD51..5888." "RAF1..5894." "RXRA..6256." "SEC11A..23478."
[73] "SEMA4B..10509." "SERPIND1..3053." "SGMS2..166929." "SLC5A11..115584."
[77] "SLURP2..432355." "SOX13..9580." "SRSF11..9295." "STT3A..3703."
[81] "SYT10..341359." "TGM2..7052." "THRB..7068." "TIMP4..7079."
[85] "TMEM40..55287." "TMEM42..131616." "TPPP2..122664." "TRPM2..7226."
[89] "TUBB4B..10383." "TXLNA..200081." "TXNRD1..7296." "UBE2R2..54926."
[93] "UPF3A..65110." "USP4..7375." "YES1..7525." "ZCWPW2..152098."
[97] "ZIC5..85416." "ZNF346..23567."